# -*- coding: utf-8 -*-
import urllib
import sys
import re

url = 'http://www.unicode.org/Public/UNIDATA/Scripts.txt'

page = urllib.urlopen(url).read()
if ( page.find('Unicode Character Database') == -1 ):
        print 'Seems we\'ve downloaded wrong page'
        sys.exit(-1)

format = re.compile("^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\\s*;\\s*([^\\s]+)", re.MULTILINE)
dict = {}

for i in re.finditer(format, page):
    second = i.group(1) if i.group(2) == None else i.group(2)
    interval = (i.group(1), second)
    if dict.has_key(i.group(3)):
        dict[i.group(3)].append(interval)
    else:
        dict[i.group(3)] = [interval]

for n in sorted(dict.keys()):
    s = 'addScript(\"' + n + '\", Arrays.asList('
    for interval in dict[n]:
        s += 'new IntInterval(0x' + interval[0] + ', 0x' + interval[1] + '), '
    s = s[:-2]
    s += '));'
    
    print s

